Antarctica - ASV analysis
Initialize
Libraries
Markdown
## Warning: package 'knitr' was built under R version 3.5.3
Get the Antarctica ASVs
| set id | description |
|---|---|
| 16 | Antar_2015_18S_V4 |
| 17 | Antar_2015_16S_plastid |
| 18 | Antar_2015_18S_V4_sorted |
- Only use asv for which supergroup_boot >= 90
rm(list = ls())
sample_type <- c("18S filter", "16S plastid", "18S sort")
asv <- list()
ps <- list()
long <- list()
# Export as specific data set as a phyloseq file
i <- 16
for (one_sample_type in sample_type) {
asv[[one_sample_type]] <- metapr2_export_asv(dataset_id_selected = i, export_phyloseq = FALSE,
export_xls = FALSE, boot_min = 90, boot_level = supergroup_boot, directory = "../dada2/")
ps[[one_sample_type]] <- asv[[one_sample_type]][["ps"]]
# Label samples with date
sample_data(ps[[one_sample_type]])$sample_label <- as.character(sample_data(ps[[one_sample_type]])$date)
# Add TFF to sample name sample_data(ps[[one_sample_type]])$sample_label <-
# str_c(sample_data(ps[[one_sample_type]])$metadata_code,
# str_replace_na(sample_data(ps[[one_sample_type]])$sample_concentration,
# ''), sep = '.' )
print(glue("Phyloseq - {one_sample_type}"))
print(ps[[one_sample_type]])
cat("============================\n")
i <- i + 1
}Phyloseq - 18S filter
phyloseq-class experiment-level object
otu_table() OTU Table: [ 2310 taxa and 123 samples ]
sample_data() Sample Data: [ 123 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 2310 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 16S plastid
phyloseq-class experiment-level object
otu_table() OTU Table: [ 442 taxa and 103 samples ]
sample_data() Sample Data: [ 103 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 442 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 18S sort
phyloseq-class experiment-level object
otu_table() OTU Table: [ 385 taxa and 60 samples ]
sample_data() Sample Data: [ 60 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 385 taxa by 8 taxonomic ranks ]
============================
[1] "sample_id" "file_name"
[3] "sample_name" "sample_code"
[5] "metadata_code" "replicate"
[7] "DNA_RNA" "fraction_name"
[9] "fraction_min" "fraction_max"
[11] "sample_concentration" "sample_sorted"
[13] "reads_total" "sample_remark"
[15] "metadata_id" "metadata_code_original"
[17] "project" "cruise"
[19] "station_id" "station_id_num"
[21] "year" "date"
[23] "time" "season"
[25] "depth_level" "depth"
[27] "substrate" "substrate_description"
[29] "substrate_description_detailed" "experiment_name"
[31] "experiment_time" "experiment_time_unit"
[33] "experiment_bottle" "experiment_condition"
[35] "latitude" "longitude"
[37] "site_name" "country"
[39] "oceanic_region" "bottom_depth"
[41] "temperature" "salinity"
[43] "pH" "O2"
[45] "fluorescence" "ice_coverage"
[47] "Chla" "NO2"
[49] "NO3" "PO4"
[51] "Si" "Chla_0.2_3 um"
[53] "bact_ml" "syn_ml"
[55] "peuk_ml" "neuk_ml"
[57] "crypto_ml" "virus_small_ml"
[59] "virus_large_ml" "metadata_remark"
[61] "sample_label"
Filtration
- Only use Station 6 (not 14)
- Only surface considered (5 m)
- Do not consider TFF samples
- Only keep photosynthetic groups abd exclude dinoflagellates
- Very important, must remove taxa that are not present in the filtered samples
for (one_sample_type in sample_type) {
ps[[one_sample_type]] <- ps[[one_sample_type]] %>% subset_taxa((division %in%
c("Chlorophyta", "Cryptophyta", "Rhodophyta", "Haptophyta", "Ochrophyta")) |
(class == "Filosa-Chlorarachnea")) %>% subset_samples(station_id ==
"6") %>% subset_samples(depth_level == "surface") %>% subset_samples(is.na(sample_concentration)) %>%
filter_taxa(function(x) sum(x) > 0, TRUE)
print(glue("Phyloseq - {one_sample_type}"))
print(ps[[one_sample_type]])
cat("============================\n")
}Phyloseq - 18S filter
phyloseq-class experiment-level object
otu_table() OTU Table: [ 394 taxa and 50 samples ]
sample_data() Sample Data: [ 50 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 394 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 16S plastid
phyloseq-class experiment-level object
otu_table() OTU Table: [ 236 taxa and 40 samples ]
sample_data() Sample Data: [ 40 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 236 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 18S sort
phyloseq-class experiment-level object
otu_table() OTU Table: [ 114 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 114 taxa by 8 taxonomic ranks ]
============================
Get the different fraction separately
fraction_filter <- c(0.2, 3, 20)
fraction_sort <- c("pico", "nano")
sample_type_filtered <- c(str_c(sample_type[1], fraction_filter, "um", sep = " "),
str_c(sample_type[2], fraction_filter, "um", sep = " "), str_c(sample_type[3],
fraction_sort, sep = " "))
# Export as specific data set as a phyloseq file
for (one_sample_type in sample_type[1:2]) {
for (one_fraction in fraction_filter) {
one_sample_type_filtered <- c(str_c(one_sample_type, one_fraction, "um",
sep = " "))
ps[[one_sample_type_filtered]] <- ps[[one_sample_type]] %>% subset_samples(fraction_min ==
one_fraction) %>% filter_taxa(function(x) sum(x) > 0, TRUE)
print(glue("Phyloseq - {one_sample_type_filtered}"))
print(ps[[one_sample_type_filtered]])
cat("============================\n")
}
}Phyloseq - 18S filter 0.2 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 196 taxa and 17 samples ]
sample_data() Sample Data: [ 17 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 196 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 18S filter 3 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 229 taxa and 18 samples ]
sample_data() Sample Data: [ 18 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 229 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 18S filter 20 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 201 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 201 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 16S plastid 0.2 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 112 taxa and 11 samples ]
sample_data() Sample Data: [ 11 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 112 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 16S plastid 3 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 142 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 142 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 16S plastid 20 um
phyloseq-class experiment-level object
otu_table() OTU Table: [ 120 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 120 taxa by 8 taxonomic ranks ]
============================
# Sorted samples
for (one_sample_type in sample_type[3]) {
for (one_fraction in fraction_sort) {
one_sample_type_filtered <- c(str_c(one_sample_type, one_fraction, sep = " "))
ps[[one_sample_type_filtered]] <- ps[[one_sample_type]] %>% subset_samples(fraction_name ==
one_fraction) %>% filter_taxa(function(x) sum(x) > 0, TRUE)
print(glue("Phyloseq - {one_sample_type_filtered}"))
print(ps[[one_sample_type_filtered]])
cat("============================\n")
}
}Phyloseq - 18S sort pico
phyloseq-class experiment-level object
otu_table() OTU Table: [ 65 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 65 taxa by 8 taxonomic ranks ]
============================
Phyloseq - 18S sort nano
phyloseq-class experiment-level object
otu_table() OTU Table: [ 70 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 70 taxa by 8 taxonomic ranks ]
============================
Normalize and transform to long form
for (one_sample_type in sample_type_all) {
cat(one_sample_type)
cat("\n============================")
ps[[one_sample_type]] <- phyloseq_normalize_median(ps[[one_sample_type]])
long[[one_sample_type]] <- phyloseq_transform_to_long(ps[[one_sample_type]])
cat("\n")
}18S filter
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 394 taxa and 50 samples ]
sample_data() Sample Data: [ 50 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 394 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 23202
16S plastid
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 236 taxa and 40 samples ]
sample_data() Sample Data: [ 40 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 236 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 28885
18S sort
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 114 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 114 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 27275
18S filter 0.2 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 196 taxa and 17 samples ]
sample_data() Sample Data: [ 17 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 196 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 14647
18S filter 3 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 229 taxa and 18 samples ]
sample_data() Sample Data: [ 18 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 229 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 24783
18S filter 20 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 201 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 201 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 29427
16S plastid 0.2 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 112 taxa and 11 samples ]
sample_data() Sample Data: [ 11 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 112 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 28471
16S plastid 3 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 142 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 142 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 31801
16S plastid 20 um
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 120 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 120 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 25641
18S sort pico
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 65 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 65 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 33258
18S sort nano
============================
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 70 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 70 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 21650
Treemaps
treemap_class <- list()
treemap_genus <- list()
for (one_sample_type in sample_type_all) {
treemap_class[[one_sample_type]] <- phyloseq_long_treemap(long[[one_sample_type]],
division, class, str_c(one_sample_type, " - Class"))
treemap_genus[[one_sample_type]] <- phyloseq_long_treemap(long[[one_sample_type]],
class, genus, str_c(one_sample_type, " - Genus"))
}ASVs bargraphs
bargraph_asv <- list()
bargraph_species <- list()
for (one_sample_type in sample_type_all) {
bargraph_asv[[one_sample_type]] <- phyloseq_long_bargraph(long[[one_sample_type]],
text_scaling = 0.75, n_bars = 20, use_asv = TRUE, title = one_sample_type)
bargraph_species[[one_sample_type]] <- phyloseq_long_bargraph(long[[one_sample_type]],
text_scaling = 0.75, n_bars = 20, use_asv = FALSE, title = one_sample_type)
} # Barplots per sample
bargraph_sample <- list()
for (one_sample_type in sample_type_all[4:11]) {
bargraph_sample[[one_sample_type]] <- plot_bar(ps[[one_sample_type]], x = "sample_label",
fill = "class") + geom_bar(aes(color = class, fill = class), stat = "identity",
position = "stack") + ggtitle(str_c("Class level - ", one_sample_type)) +
theme(axis.text.y = element_text(size = 10)) + theme(axis.text.x = element_text(angle = 0,
hjust = 0.5)) + coord_flip() + scale_fill_viridis_d() + scale_color_viridis_d()
print(bargraph_sample[[one_sample_type]])
}Heatmaps (using the 10% most abundant taxa)
Define function
Class
heatmap_class <- list()
for (one_sample_type in sample_type_all[4:11]) {
ps_heat <- tax_glom(ps[[one_sample_type]], taxrank = "class") %>% phyloseq_keep_abundant(contrib_min = 0.1)
heatmap_class[[one_sample_type]] <- plot_heatmap(ps_heat, method = "NMDS",
distance = "bray", taxa.label = "class", taxa.order = "division", sample.label = "sample_label",
sample.order = "sample_label", low = "beige", high = "red", na.value = "beige",
title = one_sample_type)
# plot(heatmap(otu_table(ps_heat)))
print(heatmap_class[[one_sample_type]])
}
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 7 taxa and 17 samples ]
sample_data() Sample Data: [ 17 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 7 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 13808
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 7 taxa and 18 samples ]
sample_data() Sample Data: [ 18 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 7 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 23366
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 5 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 27860
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 4 taxa and 11 samples ]
sample_data() Sample Data: [ 11 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 4 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 27592
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 5 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 31460
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 4 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 4 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 24881
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 4 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 4 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 32016
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 4 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 4 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 21054
Genus
heatmap_genus <- list()
for (one_sample_type in sample_type_all[4:11]) {
ps_heat <- tax_glom(ps[[one_sample_type]], taxrank = "genus") %>% phyloseq_keep_abundant(contrib_min = 0.1)
heatmap_genus[[one_sample_type]] <- plot_heatmap(ps_heat, method = "NMDS",
distance = "bray", taxa.label = "genus", taxa.order = "division", sample.label = "sample_label",
sample.order = "sample_label", low = "beige", high = "red", na.value = "beige",
title = one_sample_type)
print(heatmap_genus[[one_sample_type]])
}
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 11 taxa and 17 samples ]
sample_data() Sample Data: [ 17 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 11 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 12475
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 6 taxa and 18 samples ]
sample_data() Sample Data: [ 18 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 6 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 20234
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 9 taxa and 15 samples ]
sample_data() Sample Data: [ 15 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 9 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 26305
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 4 taxa and 11 samples ]
sample_data() Sample Data: [ 11 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 4 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 27286
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 7 taxa and 16 samples ]
sample_data() Sample Data: [ 16 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 7 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 30698
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 5 taxa and 13 samples ]
sample_data() Sample Data: [ 13 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 24093
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 6 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 6 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 29874
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 5 taxa and 8 samples ]
sample_data() Sample Data: [ 8 samples by 61 sample variables ]
tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ]
==========
The median number of reads used for normalization is 20315
Figures
Figure 2
treemap_18S_filt <- cowplot::plot_grid(treemap_class[["18S filter 0.2 um"]]$gg,
treemap_class[["18S filter 3 um"]]$gg, treemap_class[["18S filter 20 um"]]$gg,
ncol = 1)
treemap_18S_sort <- cowplot::plot_grid(treemap_class[["18S sort pico"]]$gg,
treemap_class[["18S sort nano"]]$gg, ncol = 1)
treemap_16S_filt <- cowplot::plot_grid(treemap_class[["16S plastid 0.2 um"]]$gg,
treemap_class[["16S plastid 3 um"]]$gg, treemap_class[["16S plastid 20 um"]]$gg,
ncol = 1)
fig_2 <- cowplot::plot_grid(bargraph_species[["18S filter"]]$gg, treemap_18S_filt,
bargraph_species[["18S sort"]]$gg, treemap_18S_sort, bargraph_species[["16S plastid"]]$gg,
treemap_16S_filt, ncol = 2, rel_widths = c(2, 1))
fig_2